{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !wget https://huseinhouse-data.s3-ap-southeast-1.amazonaws.com/Parl.+Replies_Verified-20191113T140600Z-001.zip\n",
    "# !wget https://huseinhouse-data.s3-ap-southeast-1.amazonaws.com/Parl.+Replies_Verified-20191113T140600Z-002.zip\n",
    "# !unzip Parl.+Replies_Verified-20191113T140600Z-001.zip\n",
    "# !unzip Parl.+Replies_Verified-20191113T140600Z-002.zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "304"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from glob import glob\n",
    "\n",
    "pdfs = glob('Parl. Replies_Verified/**/*.pdf', recursive = True)\n",
    "len(pdfs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip3 install tika --user"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# wget https://www-eu.apache.org/dist/tika/tika-server-1.22.jar\n",
    "# java -jar tika-server-1.22.jar --port 9997"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tika import parser\n",
    "import tika\n",
    "tika.TikaClientOnly = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "parsed = parser.from_file(pdfs[0], 'http://localhost:9997/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cleaning\n",
    "\n",
    "from unidecode import unidecode\n",
    "\n",
    "def clean(string):\n",
    "    string = [cleaning.cleaning(s) for s in string]\n",
    "    \n",
    "    string = [s.strip() for s in string if 'tarikh' not in s.lower() and 'soalan no' not in s.lower()]\n",
    "    string = [s for s in string if not ''.join(s.split()[:1]).isdigit() and '.soalan' not in s.lower() and 'jum ' not in s.lower()]\n",
    "    string = [s for s in string if not s[:3].isdigit() and not s[-3:].isdigit()]\n",
    "    return string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  9%|▊         | 26/304 [00:22<03:02,  1.52it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "'NoneType' object has no attribute 'split'\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 19%|█▉        | 59/304 [00:38<01:20,  3.06it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "'NoneType' object has no attribute 'split'\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 21%|██        | 63/304 [00:40<01:56,  2.08it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "'NoneType' object has no attribute 'split'\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 65%|██████▍   | 197/304 [01:53<00:59,  1.80it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "'NoneType' object has no attribute 'split'\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 304/304 [02:52<00:00,  1.77it/s]\n"
     ]
    }
   ],
   "source": [
    "from tqdm import tqdm\n",
    "\n",
    "outer = []\n",
    "\n",
    "for k in tqdm(range(len(pdfs))):\n",
    "    try:\n",
    "        pdf = pdfs[k]\n",
    "        parsed = parser.from_file(pdf, 'http://localhost:9997/')\n",
    "\n",
    "        c = clean(parsed['content'].split('\\n'))\n",
    "        t, last = [], 0\n",
    "\n",
    "        i = 0\n",
    "        while i < len(c):\n",
    "            text = c[i]\n",
    "\n",
    "            if len(text) > 5:\n",
    "                if len(text.split()) > 1:\n",
    "                    t.append(text)\n",
    "                last = i\n",
    "            else:\n",
    "                if len(t) and (i - last) > 2:\n",
    "                    t.append('')\n",
    "                    outer.extend(t)\n",
    "                    t = []\n",
    "                    last = i\n",
    "                elif not len(t):\n",
    "                    last = i\n",
    "\n",
    "            i += 1\n",
    "\n",
    "        if len(t):\n",
    "            t.append('')\n",
    "            outer.extend(t)\n",
    "    except Exception as e:\n",
    "        print(e)\n",
    "        pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "960869"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(outer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 351 ms, sys: 284 ms, total: 635 ms\n",
      "Wall time: 744 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "temp_vocab = list(set(cleaning.multiprocessing(outer, cleaning.unique_words)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "630\n",
      "CPU times: user 74.9 ms, sys: 113 ms, total: 188 ms\n",
      "Wall time: 555 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "# important\n",
    "temp_dict = cleaning.multiprocessing(temp_vocab, cleaning.duplicate_dots_marks_exclamations, list_mode = False)\n",
    "print(len(temp_dict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 960869/960869 [00:02<00:00, 336637.78it/s]\n"
     ]
    }
   ],
   "source": [
    "outer = cleaning.string_dict_cleaning(outer, temp_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "200\n",
      "CPU times: user 65.8 ms, sys: 116 ms, total: 181 ms\n",
      "Wall time: 255 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "# important\n",
    "temp_dict = cleaning.multiprocessing(temp_vocab, cleaning.remove_underscore, list_mode = False)\n",
    "print(len(temp_dict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 960869/960869 [00:02<00:00, 340252.31it/s]\n"
     ]
    }
   ],
   "source": [
    "outer = cleaning.string_dict_cleaning(outer, temp_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "CPU times: user 217 ms, sys: 184 ms, total: 402 ms\n",
      "Wall time: 1.1 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "# important\n",
    "temp_dict = cleaning.multiprocessing(outer, cleaning.isolate_spamchars, list_mode = False)\n",
    "print(len(temp_dict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4944\n",
      "CPU times: user 66.3 ms, sys: 132 ms, total: 198 ms\n",
      "Wall time: 271 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "temp_dict = cleaning.multiprocessing(temp_vocab, cleaning.break_short_words, list_mode = False)\n",
    "print(len(temp_dict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 960869/960869 [00:02<00:00, 333845.07it/s]\n"
     ]
    }
   ],
   "source": [
    "outer = cleaning.string_dict_cleaning(outer, temp_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "695\n",
      "CPU times: user 99.3 ms, sys: 96.3 ms, total: 196 ms\n",
      "Wall time: 238 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "temp_dict = cleaning.multiprocessing(temp_vocab, cleaning.break_long_words, list_mode = False)\n",
    "print(len(temp_dict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 960869/960869 [00:02<00:00, 349343.64it/s]\n"
     ]
    }
   ],
   "source": [
    "outer = cleaning.string_dict_cleaning(outer, temp_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "230\n",
      "CPU times: user 73.4 ms, sys: 120 ms, total: 193 ms\n",
      "Wall time: 270 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "temp_dict = cleaning.multiprocessing(temp_vocab, cleaning.remove_ending_underscore, list_mode = False)\n",
    "print(len(temp_dict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 960869/960869 [00:02<00:00, 339206.66it/s]\n"
     ]
    }
   ],
   "source": [
    "outer = cleaning.string_dict_cleaning(outer, temp_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "160\n",
      "CPU times: user 66.4 ms, sys: 121 ms, total: 187 ms\n",
      "Wall time: 264 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "temp_dict = cleaning.multiprocessing(temp_vocab, cleaning.remove_starting_underscore, list_mode = False)\n",
    "print(len(temp_dict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 960869/960869 [00:02<00:00, 336362.07it/s]\n"
     ]
    }
   ],
   "source": [
    "outer = cleaning.string_dict_cleaning(outer, temp_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "70969\n",
      "CPU times: user 294 ms, sys: 132 ms, total: 426 ms\n",
      "Wall time: 533 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "temp_dict = cleaning.multiprocessing(temp_vocab, cleaning.end_punct, list_mode = False)\n",
    "print(len(temp_dict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 960869/960869 [00:03<00:00, 312211.61it/s]\n"
     ]
    }
   ],
   "source": [
    "outer = cleaning.string_dict_cleaning(outer, temp_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "30336\n",
      "CPU times: user 167 ms, sys: 120 ms, total: 287 ms\n",
      "Wall time: 344 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "temp_dict = cleaning.multiprocessing(temp_vocab, cleaning.start_punct, list_mode = False)\n",
    "print(len(temp_dict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 960869/960869 [00:03<00:00, 306990.19it/s]\n"
     ]
    }
   ],
   "source": [
    "outer = cleaning.string_dict_cleaning(outer, temp_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1238\n",
      "CPU times: user 73.5 ms, sys: 128 ms, total: 201 ms\n",
      "Wall time: 275 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "temp_dict = cleaning.multiprocessing(temp_vocab, cleaning.join_dashes, list_mode = False)\n",
    "print(len(temp_dict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 960869/960869 [00:03<00:00, 315174.82it/s]\n"
     ]
    }
   ],
   "source": [
    "outer = cleaning.string_dict_cleaning(outer, temp_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Tuan Ahmad Baihaki Bin Atiqullah [ Kubang Kerian ] minta',\n",
       " \"' PERDANA MENTERI menyatakan apakah status pembinaan\",\n",
       " 'Terminal Bersepadu Gombak ( ITT ) yang masih belum siap sehingga',\n",
       " '',\n",
       " \"JAWAPAN : YB DATO ' SRI HAJAH NANCY SHUKRI\",\n",
       " 'MENTERI DI JABATAN PERDANA MENTERI',\n",
       " 'Tuan Yang di-Pertua ,',\n",
       " '1 . Untuk makluman Ahli Yang Berhormat , pembinaan Terminal',\n",
       " 'Bersepadu ( Integrated Transport Terminal - ITT ) Gombak telah',\n",
       " 'dirancang oleh Kerajaan sebagai salah satu projek di bawah Bidang',\n",
       " 'Keberhasilan Utama Negara ( National Key Result Area - NKRA ).',\n",
       " '2 . ITT Gombak ini akan menyediakan kemudahan bagi',\n",
       " 'perkhidmatan bas ekspres untuk koridor Timur yang merangkumi',\n",
       " 'negeri Pahang , Terengganu dan Kelantan . Lokasi ITT Gombak dipilih',\n",
       " 'berdasarkan kebolehcapaiannya serta pengurangan kesesakan jalan',\n",
       " 'raya kepada orang ramai dan kesepaduannya (intergration ) dengan',\n",
       " 'stesen LRT Gombak apabila ia siap dibina kelak .',\n",
       " '3 . Pada masa ini , kerja-kerja awalan sedang dijalankan termasuk',\n",
       " 'pembersihan tapak , pengalihan utiliti serta kerja menaik taraf Jalan',\n",
       " 'Lingkaran Tengah Kedua {MRR2 ) sebagai jalan masuk ke tapak',\n",
       " 'projek . Projek ini dijangka akan bermula di tapak pada bulan',\n",
       " 'Julai 2017 dan dijangka akan siap pada tahun 2019 .',\n",
       " 'Sekian , terima kasih .',\n",
       " '',\n",
       " 'SO ALAN',\n",
       " 'PEMBERITAHUAN PERTANYAAN',\n",
       " 'DEWAN RAKYAT',\n",
       " 'BAGI BUKAN JAWAB LISAN',\n",
       " 'TUAN AHMAD BAIHAKI BIN ATIQULLAH',\n",
       " '[ KUBANG KERIAN ]',\n",
       " 'Tuan Ahmad Baihaki Bin Atiqullah [ Kubang Kerian ] minta MENTERI',\n",
       " 'PENGANGKUTAN menyatakan apakah punca Kerajaan mengharamkan',\n",
       " 'perkhidmatan bonceng motosikal \" Dego Ride \".',\n",
       " '',\n",
       " 'Ahli Yang Berhormat ,',\n",
       " 'Mengikut statistik tahun 2016 , lebih kurang 62 peratus daripada lebih',\n",
       " 'kurang 7 , 152 jiwa yang terkorban di atas jalan raya adalah melibatkan',\n",
       " 'penunggang dan pembonceng motosikal . Mengikut analisa yang',\n",
       " 'dijalankan oleh lnstitut Penyelidikan Keselamatan Jalan Raya ( MIROS ),',\n",
       " 'risiko pengguna motosikal untuk terlibat dalam kemalangan maut dan',\n",
       " 'kemalangan yang mengakibatkan cedera parah adalah 42.5 kali lebih',\n",
       " 'tinggi daripada bas dan 16 kali lebih tinggi daripada kereta . Justeru ,',\n",
       " 'penggunaan Dego Ride akan mengakibatkan lebih ramai pengguna jalan',\n",
       " 'raya terdedah kepada risiko kemalangan maut dan kecederaan parah .',\n",
       " 'Sehubungan dengan itu , mengambil kira faktor-faktor keselamatan',\n",
       " 'penunggang dan pembonceng motosikal , maka Kementerian',\n",
       " 'Pengangkutan telah memutuskan agar tidak membenarkan Dego Ride',\n",
       " 'beroperasi di jalan raya .',\n",
       " '',\n",
       " 'PEMBERITAHUAN PERTANYAAN DEWAN RAKYAT',\n",
       " 'PERTANYAAN BUKAN JAWAB LISAN',\n",
       " 'DARIPADA TUAN IDRIS BIN AHMAD',\n",
       " 'Tuan Idris Bin Ahmad [ Bukit Gantang ] minta MENTERI PENDIDIKAN',\n",
       " 'TINGGI menyatakan berapakah jumlah pemegang ijazah doktor falsafah',\n",
       " '( PhD ) daripada kalangan Orang Kelainan Upaya ( OKU ) dan berapa ramai',\n",
       " 'mereka yang bekerja di dalam perkhidmatan awam .',\n",
       " 'Tuan Yang di-Pertua ,',\n",
       " 'Untuk makluman Ahli Yang Berhormat , berdasarkan Kajian Pengesanan',\n",
       " 'Graduan yang dijalankan oleh Kementerian Pendidikan Tinggi , pada',\n",
       " 'tahun 2013 hingga 2016 seramai 27 orang graduan dari Orang Kelainan',\n",
       " 'Upaya ( OKU ) telah berjaya menamatkan pengajian di peringkat ljazah',\n",
       " 'Doktor Falsafah ( PhD ) di Universiti Awam ( UA ) dan lnstitusi Pendidikan',\n",
       " 'Tinggi Swasta (IPTS ). Kajian tersebut juga menyatakan bahawa seramai',\n",
       " '',\n",
       " 'PEMBERITAHUAN PERTANYAAN BAGI BUKAN JAWAB LISAN',\n",
       " 'DEWAN RAKYAT',\n",
       " 'PERTANYAAN BUKAN JAWAB LISAN',\n",
       " 'DARIPADA TUAN IDRIS BIN AHMAD',\n",
       " 'Tuan Idris Bin Ahmad [ Bukit Gantang ] minta MENTERI DALAM',\n",
       " 'NEGERI menyatakan senarai syarikat pekerja asing yang diberi lesen',\n",
       " 'untuk membawa buruh asing ke negara ini sejak tahun 2000 hingga',\n",
       " 'yang telah ditetapkan .',\n",
       " '',\n",
       " 'Jumlah syarikat outsourcing yang telah diluluskan dan berdaftar',\n",
       " 'dengan Kementerian Dalam Negeri adalah sebanyak 279 buah',\n",
       " 'Waiau bagaimanapun , pada tahun 2007 , Kerajaan telah',\n",
       " 'memutuskan untuk membekukan kelulusan baru bagi',\n",
       " 'mendapatkan lesen membekal dan mengurus pekerja asing dan',\n",
       " 'pembekuan ini masih kekal sehingga kini .',\n",
       " '',\n",
       " 'PEMBERITAHUAN PERTANYAAN DEWAN RAKYAT',\n",
       " 'JAWAPAN OLEH YB DATUK SERI DR . S . SUBRAMANIAM',\n",
       " 'MENTERI KESIHATAN MALAYSIA',\n",
       " 'SO ALAN',\n",
       " 'BUKAN LISAN',\n",
       " 'YB TUAN IDRIS BIN AHMAD',\n",
       " '[ BUKIT GANTANG ]',\n",
       " 'YB Tuan Idris bin Ahmad [ Bukit Gantang ] minta Menteri Kesihatan',\n",
       " 'menyatakan berapa jumlah bil tertunggak yang tidak dibayar oleh',\n",
       " 'pendatang asing di hospital Kerajaan sejak tahun 2010 - 2016 dan',\n",
       " 'apakah langkah yang diambil untuk niengatasi masalah ini .',\n",
       " 'Tuan Yang di-Pertua ,',\n",
       " '',\n",
       " 'sehingga RM50.5juta pada tahun 2016 .',\n",
       " 'Tuan Yang di-Pertua ,',\n",
       " 'Kementerian Kesihatan Malaysia ( KKM ) telah mewujudkan Standard',\n",
       " 'Operating Procedure (SOP ) yang perlu diteliti oleh semua pegawai yang -',\n",
       " 'bertanggungjawab mengutip hasil di fasiliti kesihatan dalam usaha untuk',\n",
       " 'mengurangkan jumlah bil tertunggak melibatkan warga asing .',\n",
       " '']"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "outer[-100:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('dumping-parliament.txt', 'w') as fopen:\n",
    "    fopen.write('\\n'.join(outer))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
